/****************

DataAppendix.do
--Creates table and figures for data appendix

****************/

*NOTE: SOME VARIABLES MAY HAVE TO BE TRANSFORMED BASED ON THE SPREAD ANALYSIS BELOW

*ITA_state.dta appendix output

use "Data\AnalysisData\ITA_state.dta", clear

*Preliminary state breakdown

logout, save(Output/DataAppendixOutput/state)  replace: ///
	tab state

graph bar, over(state) title("State of establishment") legend(label(1 KY) label(2 OH)) saving(Output\DataAppendixOutput\state.gph, replace)

* ann_empl

histogram log_ann_empl, bins(10) frequency xtitle("Log of average number of employees in reporting year") saving(Output\DataAppendixOutput\log_ann_empl.gph, replace)

logout, save(Output/DataAppendixOutput/log_ann_empl)  replace: ///
	tabstat log_ann_empl, statistics (mean sd min max q)
 
* total_hours_worked

histogram log_total_hours_worked, frequency xtitle("Log of total hours worked by all employees") saving(Output\DataAppendixOutput\log_tot_hrs.gph, replace)

logout, save(Output/DataAppendixOutput/log_tot_hrs)  replace: ///
	tabstat log_total_hours_worked, statistics (mean sd min max q)

* total_dafw_days
 
histogram log_total_dafw_days, frequency xtitle("Log of total number of days away from work") saving(Output\DataAppendixOutput\log_tot_dafw_days.gph, replace)

logout, save(Output/DataAppendixOutput/log_tot_dafw_days)  replace: ///
	tabstat log_total_dafw_days, statistics (mean sd min max q)

* total_injuries

histogram log_total_injuries, frequency xtitle("Log of total injuries") saving(Output\DataAppendixOutput\log_tot_inj.gph, replace)

logout, save(Output/DataAppendixOutput/log_tot_inj)  replace: ///
	tabstat log_total_injuries, statistics (mean sd min max q)
	
* inj_per_worker

histogram log_ipw, frequency xtitle("Log of injuries per worker") saving(Output\DataAppendixOutput\log_ipw.gph, replace)

logout, save(Output/DataAppendixOutput/log_ipw)  replace: ///
	tabstat log_ipw, statistics (mean sd min max q)

* year

logout, save(Output/DataAppendixOutput/year)  replace: ///
	tab year

graph bar, over(year) title("Year of observation") saving(Output\DataAppendixOutput\year.gph, replace)

* state_dummy

logout, save(Output/DataAppendixOutput/state_dummy)  replace: ///
	tab state_dummy

graph bar, over(state_dummy, relabel(1 "OH" 2 "KY")) title("State of establishment") legend(label(1 KY) label(2 OH)) saving(Output\DataAppendixOutput\state_dummy.gph, replace)

* time_dummy_2017

logout, save(Output/DataAppendixOutput/time_2017)  replace: ///
	tab time_dummy_2017

graph bar, over(time_dummy_2017, relabel(1 "Pre" 2 "Post")) title("Time relative to treatment (2017)") saving(Output\DataAppendixOutput\time_2017.gph, replace)

* state_time

logout, save(Output/DataAppendixOutput/state_time)  replace: ///
	tab state_time

graph bar, over(state_time, relabel(1 "All others" 2 "KY post-treatment")) title("State-time interaction") saving(Output\DataAppendixOutput\state_time.gph, replace)

* naics_code

logout, save(Output/DataAppendixOutput/naics_code)  replace: ///
	tab naics_code1

graph bar, over(naics_code1) title("Industry (NAICS code)") saving(Output\DataAppendixOutput\naics_code.gph, replace)

* size

logout, save(Output/DataAppendixOutput/size) replace: ///
	tab size
	
graph bar, over(size, relabel(1 "<25" 2 "25-249" 3 ">250")) title("Number of annual average employees") saving(Output\DataAppendixOutput\size.gph, replace)

* empl - may have to take logs?

logout, save(Output/DataAppendixOutput/empl)  replace: ///
	tabstat empl, statistics (mean sd min max q)

histogram empl, bins(100) frequency xtitle("Wage and salary employment") saving(Output\DataAppendixOutput\empl.gph, replace)

* member - may have to take logs?

logout, save(Output/DataAppendixOutput/member)  replace: ///
	tabstat member, statistics (mean sd min max q)

histogram member, bins(100) frequency xtitle("Employed union members") saving(Output\DataAppendixOutput\member.gph, replace)

* covered - may have to take logs?

logout, save(Output/DataAppendixOutput/covered)  replace: ///
	tabstat covered, statistics (mean sd min max q)

histogram covered, bins(100) frequency xtitle("Workers covered by CBA") saving(Output\DataAppendixOutput\covered.gph, replace)

* pctmem100 - may have to take logs?

logout, save(Output/DataAppendixOutput/pctmem100)  replace: ///
	tabstat pctmem100, statistics (mean sd min max q)

histogram pctmem100, bins(100) frequency xtitle("Percent of employed workers who are union members") saving(Output\DataAppendixOutput\pctmem100.gph, replace)

* pctcov100 - may have to take logs?

logout, save(Output/DataAppendixOutput/pctcov100)  replace: ///
	tabstat pctcov100, statistics (mean sd min max q)

histogram pctcov100, bins(100) frequency xtitle("Percent of employed workers covered by CBA") saving(Output\DataAppendixOutput\pctcov100.gph, replace)

* construction

logout, save(Output/DataAppendixOutput/construction)  replace: ///
	tab construction
 
graph bar, over(construction, relabel(1 "No" 2 "Yes")) title("Establishment is in construction industry") saving(Output\DataAppendixOutput\construction.gph, replace)

* manufacturing

logout, save(Output/DataAppendixOutput/manufacturing)  replace: ///
	tab manufacturing
 
graph bar, over(manufacturing, relabel(1 "No" 2 "Yes")) title("Establishment is in manufacturing industry") saving(Output\DataAppendixOutput\manufacturing.gph, replace)

* private

logout, save(Output/DataAppendixOutput/private)  replace: ///
	tab private
 
graph bar, over(private, relabel(1 "No" 2 "Yes")) title("Establishment is private, non-construction non-manufacturing") saving(Output\DataAppendixOutput\private.gph, replace)

